the ranks will be a named num, with
entrezgene_id as name and stat (Wald Test) as
metric
getRanks <- function(res, annot) {
# only taking genes which have entrezgene_ids assigned to them
genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>%
filter(!is.na(entrezgene_id))
ranks <- as.data.frame(res) %>%
tibble::rownames_to_column("GeneID") %>%
merge(genes_with_entrez, by = "GeneID") %>%
arrange(desc(stat)) %>%
select(entrezgene_id, stat) %>%
tibble::deframe() # creating a named num from two columns
return(ranks)
}
ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)
# TODO: why (again) is the soleus gene count seemingly 300 below gastroc gene count / ranks count
# -> seems because of e.g. the cutoff at DESeq
duplicate entrezgene ids (multiple entrez are mapped to the same gene name)
Thus a quick look if any of these genes can be simply omitted. Like if the gene_type is “other” or “tRNA”
Since all ENSEMBL duplicates are also found in the gene_name duplicates, using the ENSEMBL as id for the ranks would reduce the number of duplicates by 107.
actually most of them are “protein coding” and will not be omitted. to be continued …
Pathways are provided by http://www.gsea-msigdb.org/gsea/msigdb/mouse/collections.jsp
For now the Canonical pathways are used. These gene sets represent biological a biological process. They are composed from the following databases taking a subset of CP:
| database | gene sets |
|---|---|
| BioCarta | 252 |
| Reactome | 1249 |
| WikiPathways | 186 |
fgseaRes <- fgsea(
pathways = CGP,
stats = ranks,
minSize = 15,
maxSize = 200
)
ordering pathways by padj values and using ES to
# ' obtain top pathways ordered by padj and use `ES` for up or down regulation
get_top_pathways <- function(fgseaRes, up = TRUE, pCutoff=params$pCutoff, n=10) {
.updown <- ifelse(up, `>`, `<`)
top.pathways <- fgseaRes %>%
filter(.updown(ES,0), padj < pCutoff) %>%
arrange(padj) %>%
slice_head(n=n)
return(top.pathways)
}
plot for top up and down regulated pathways
# ' plots top n enrichment plots for the given fgsea result
plot_top_enrichment <- function(fgseaRes, pathways, ranks, n = 9, up = TRUE) {
# extracting the top n pathways
top.pathways <- get_top_pathways(fgseaRes, up=up, pCutoff=params$pCutoff, n=n)
plot.list <- list()
# lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
for (i in 1:nrow(top.pathways)) {
# filling plot.list with enrichmentPlots
# TODO: how can I use facet_wrap for this?
pathway <- top.pathways[i]$pathway
plt <- plotEnrichment(pathways[[pathway]], ranks) +
# TODO: adjust yaxis to the same scale
# TODO: keep axis.text.x only on the lower row
# TODO: keep axis.text.y only on the right column
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
) # +
# coord_cartesian(xlim = lims$x, ylim = lims$y)
plot.list[[i]] <- plt
}
arrange_plts(plot.list)
}
# ' helper function to arragen the plot from the enrichment
arrange_plts <- function(plt.list) {
nplts <- length(plt.list)
plt <- plt.list[1]
xlab <- plt$labels$x
ylab <- plt$labels$y
# set axis to the same scale
lims <- list("x" = c(0, 17000), "y" = c(-0.8, 0.0))
# remove axis
# arrange the plots
fig_labels <- LETTERS[1:nplts]
patchwork::wrap_plots(plt.list, )
figure <- ggpubr::ggarrange(plotlist = plt.list,
labels = fig_labels) %>%
annotate_figure(left = text_grob(ylab, rot = 90),
bottom = text_grob(xlab))
# TODO: remove all x-axis labels except lower row
# get dimensions
figure$layers
return(figure)
}
# plot_labels <-
# data.frame("label" = LETTERS[1:10], "pathway" = top.pathways)
# knitr::kable(caption = "plot labels", plot_labels)
plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=T)
# TODO: add plot labels to return argument of plot_top_enrichment (use list probably)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA |
| B | WP_MICROGLIA_PATHOGEN_PHAGOCYTOSIS_PATHWAY |
| C | WP_APOPTOSIS |
| D | REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL |
| E | WP_FIBRIN_COMPLEMENT_RECEPTOR_3_SIGNALING_PATHWAY |
| F | WP_CHEMOKINE_SIGNALING_PATHWAY |
| G | BIOCARTA_TNFR2_PATHWAY |
| H | REACTOME_DAP12_INTERACTIONS |
| I | REACTOME_FCGAMMA_RECEPTOR_FCGR_DEPENDENT_PHAGOCYTOSIS |
plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=F)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_THE_CITRIC_ACID_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT |
| B | REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_SYNTHESIS_BY_CHEMIOSMOTIC_COUPLING_AND_HEAT_PRODUCTION_BY_UNCOUPLING_PROTEINS |
| C | REACTOME_RESPIRATORY_ELECTRON_TRANSPORT |
| D | WP_ELECTRON_TRANSPORT_CHAIN |
| E | REACTOME_COMPLEX_I_BIOGENESIS |
| F | REACTOME_MITOCHONDRIAL_TRANSLATION |
| G | REACTOME_KEAP1_NFE2L2_PATHWAY |
| H | REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA |
| I | REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS |
plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=T)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE |
| B | REACTOME_FORMATION_OF_A_POOL_OF_FREE_40S_SUBUNITS |
| C | REACTOME_NONSENSE_MEDIATED_DECAY_NMD_INDEPENDENT_OF_THE_EXON_JUNCTION_COMPLEX_EJC |
| D | WP_CYTOPLASMIC_RIBOSOMAL_PROTEINS |
| E | WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA |
| F | REACTOME_EUKARYOTIC_TRANSLATION_INITIATION |
| G | REACTOME_NONSENSE_MEDIATED_DECAY_NMD |
| H | REACTOME_MAJOR_PATHWAY_OF_RRNA_PROCESSING_IN_THE_NUCLEOLUS_AND_CYTOSOL |
| I | REACTOME_PRC2_METHYLATES_HISTONES_AND_DNA |
plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=F)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_KEAP1_NFE2L2_PATHWAY |
| B | REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS |
| C | REACTOME_GLI3_IS_PROCESSED_TO_GLI3R_BY_THE_PROTEASOME |
| D | REACTOME_UBIQUITIN_MEDIATED_DEGRADATION_OF_PHOSPHORYLATED_CDC25A |
| E | REACTOME_RUNX1_REGULATES_TRANSCRIPTION_OF_GENES_INVOLVED_IN_DIFFERENTIATION_OF_HSCS |
| F | REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA |
| G | REACTOME_DEGRADATION_OF_DVL |
| H | REACTOME_ABC_FAMILY_PROTEINS_MEDIATED_TRANSPORT |
| I | REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS |
top significant pathways:
# creating up and down regulated pathway vectors separately to maintain order
topUp <- get_top_pathways(fgseaRes.gastroc, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.gastroc, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
arrange(-NES) %>%
pull(pathway)
plotGseaTable(
pathways = CGP[topPathways],
stats = ranks.gastroc,
fgseaRes = fgseaRes.gastroc,
gseaParam = 0.5,
render = TRUE
) %>%
ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline
top significant pathways:
# creating up and down regulated pathway vectors separately to maintain order
topUp <- get_top_pathways(fgseaRes.soleus, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.soleus, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
arrange(-NES) %>%
pull(pathway)
plotGseaTable(
pathways = CGP[topPathways],
stats = ranks.soleus,
fgseaRes = fgseaRes.soleus,
gseaParam = 0.5,
render = TRUE
) %>%
ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline
using NES from the fgsea result filtering on the set
`pCutoff=`0.01 yields the following plot:
pCutoff <- params$pCutoff
res.combined <- merge(
data.frame(fgseaRes.gastroc[, c("pathway", "NES", "padj")]),
data.frame(fgseaRes.soleus[, c("pathway", "NES", "padj")]),
by = "pathway",
suffixes = c(".ga", ".sol")
) %>%
filter(padj.ga < pCutoff | padj.sol < pCutoff) %>%
mutate(
diff.exp = case_when(
NES.ga < 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both down",
NES.ga > 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both up",
NES.ga < 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "ga down, sol up",
NES.ga > 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "ga up, sol down",
NES.ga < 0 & padj.ga < pCutoff & padj.sol > pCutoff ~ "ga down",
NES.ga > 0 & padj.ga < pCutoff & padj.sol > pCutoff ~ "ga up",
NES.sol < 0 & padj.ga > pCutoff & padj.sol < pCutoff ~ "sol down",
NES.sol > 0 & padj.ga > pCutoff & padj.sol < pCutoff ~ "sol up",
TRUE ~ "different"
)
)
# final plot
p <- ggplot(res.combined, aes(x = NES.ga, y = NES.sol, text=pathway)) +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
geom_point(aes(color = diff.exp)) +
# scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
labs(x = "gastroc", y = "soleus") +
# ggrepel::geom_label_repel(max.overlaps = 20) +
ggtitle(label = "NES")
plotly::ggplotly(p, tooltip = "all")
ggplot(res.combined, aes(x = diff.exp)) +
geom_bar(aes(fill = diff.exp))
pathways will be sorted here by the absolute sum of the NES…
res.combined %>%
filter(diff.exp == "both down") %>%
mutate(NES = abs(NES.ga) + abs(NES.sol)) %>%
select(pathway, NES) %>%
arrange(desc(NES)) %>%
rmarkdown::paged_table(options = list(cols.print = 2))
res.combined %>%
filter(diff.exp == "both up") %>%
mutate(NES = abs(NES.ga) + abs(NES.sol)) %>%
select(pathway, NES) %>%
arrange(desc(NES)) %>%
rmarkdown::paged_table(options = list(cols.min.print = 2))
res.combined %>%
filter(diff.exp == "ga up") %>%
mutate(NES = abs(NES.ga) + abs(NES.sol)) %>%
select(pathway, NES) %>%
arrange(desc(NES)) %>%
rmarkdown::paged_table(options = list(cols.print = 2))
[ ] looking at duplicate entrezgene_ids
[ ] finding optimal maxSize (one sided curve)
[ ] find out biological meaning of significant pathways